| libraries here |
r library(readxl) library(classdata) library(tidyverse) |
## -- Attaching packages --------------------------------- tidyverse 1.3.0 -- |
## v ggplot2 3.2.1 v purrr 0.3.3 ## v tibble 2.1.3 v dplyr 0.8.3 ## v tidyr 1.0.0 v stringr 1.4.0 ## v readr 1.3.1 v forcats 0.4.0 |
## -- Conflicts ------------------------------------ tidyverse_conflicts() -- ## x dplyr::filter() masks stats::filter() ## x dplyr::lag() masks stats::lag() |
r library(stringr) library(ggplot2) library(plotly) |
## ## Attaching package: 'plotly' |
## The following object is masked from 'package:ggplot2': ## ## last_plot |
## The following object is masked from 'package:stats': ## ## filter |
## The following object is masked from 'package:graphics': ## ## layout |
import data here
fight_songs = read.csv('fight-songs.csv')
ipeds <- read_excel('IPEDS_data.xlsx', sheet='Data')
lookup_table = read.csv('lookup_table.csv')
| cleaning data (VERSION 1) |
| 1) loop through all values ipeds and fight_songs |
| 2) add if similar name, otherwise print which names did not work |
| ```r cleaned_ipeds <- data.frame() i <- 1 for (name in fight_songs\(school){ if (!is.na(pmatch(name, ipeds\)Name, duplicates.ok = FALSE))){ picked_row <- (pmatch(name, ipeds\(Name)) foo <- paste("works: ", name) foo <- paste(foo, ipeds\)Name[picked_row]) print(foo) |
| cleaned_ipeds <- rbind(cleaned_ipeds, ipeds[picked_row,] ) } else{ foo <- paste(“doesn’t work:”, name) # print(foo) } } ``` |
## [1] "works: Baylor Baylor University" ## [1] "works: Iowa State Iowa State University" ## [1] "works: Kansas State Kansas State University" ## [1] "works: Oklahoma State Oklahoma State University-Main Campus" ## [1] "works: Texas Tech Texas Tech University" ## [1] "works: Maryland Maryland University of Integrative Health" ## [1] "works: Michigan State Michigan State University" ## [1] "works: Nebraska Nebraska Wesleyan University" ## [1] "works: Wisconsin Wisconsin Lutheran College" ## [1] "works: Arizona State Arizona State University-Tempe" ## [1] "works: Oregon State Oregon State University" ## [1] "works: Stanford Stanford University" ## [1] "works: Washington State Washington State University" ## [1] "works: Mississippi State Mississippi State University" ## [1] "works: South Carolina South Carolina State University" ## [1] "works: Vanderbilt Vanderbilt University" ## [1] "works: Boston College Boston College" ## [1] "works: Clemson Clemson University" ## [1] "works: Duke Duke University" ## [1] "works: Florida State Florida State University" ## [1] "works: Miami Miami University-Oxford" ## [1] "works: North Carolina State North Carolina State University at Raleigh" ## [1] "works: Pitt Pittsburg State University" ## [1] "works: Syracuse Syracuse University" ## [1] "works: Wake Forest Wake Forest University" |
r for(row in 1:nrow(lookup_table)){ location <- lookup_table$Column[row] i <- i + 1 cleaned_ipeds <- rbind(cleaned_ipeds, ipeds[location,] ) } |
cleaning the data (VERSION 2!) added a new column for the IDs in ipeds. allows for better, through cleaning. Some words weren’t used correctly in pmatch.
for (id in fight_songs$"ID Number"){
if (!is.na(match(name, ipeds$"ID Number"))){
picked_row <- (match(name, ipeds$"ID Number"))
foo <- paste("works: ", name)
cleaned_ipeds <- rbind(cleaned_ipeds, ipeds[picked_row,] )
}
else{
print("ERROR")
}
}
cleaned_ipeds
fight_songs$ID.number <- as.double(fight_songs$ID.number)
names(fight_songs)[28] <- "ID number"
fight_songs
| joining the data |
| ```r cleaned_ipeds$year <- NULL |
| join_data <- left_join(cleaned_ipeds, fight_songs) ``` |
## Joining, by = "ID number" |
jessies part
join_data2 <- left_join(fight_songs, cleaned_ipeds)
## Joining, by = "ID number"
colnames(join_data2)
## [1] "school"
## [2] "conference"
## [3] "song_name"
## [4] "writers"
## [5] "year"
## [6] "student_writer"
## [7] "official_song"
## [8] "contest"
## [9] "bpm"
## [10] "sec_duration"
## [11] "fight"
## [12] "number_fights"
## [13] "victory"
## [14] "win_won"
## [15] "victory_win_won"
## [16] "rah"
## [17] "nonsense"
## [18] "colors"
## [19] "men"
## [20] "opponents"
## [21] "spelling"
## [22] "trope_count"
## [23] "spotify_id"
## [24] "X2019_FB_Wins"
## [25] "X2019_FB_Losses"
## [26] "Niche_Athletic_Rank"
## [27] "Niche_Party_Rank"
## [28] "ID number"
## [29] "Name"
## [30] "ZIP code"
## [31] "Highest degree offered"
## [32] "County name"
## [33] "Longitude location of institution"
## [34] "Latitude location of institution"
## [35] "Religious affiliation"
## [36] "Offers Less than one year certificate"
## [37] "Offers One but less than two years certificate"
## [38] "Offers Associate's degree"
## [39] "Offers Two but less than 4 years certificate"
## [40] "Offers Bachelor's degree"
## [41] "Offers Postbaccalaureate certificate"
## [42] "Offers Master's degree"
## [43] "Offers Post-master's certificate"
## [44] "Offers Doctor's degree - research/scholarship"
## [45] "Offers Doctor's degree - professional practice"
## [46] "Offers Doctor's degree - other"
## [47] "Offers Other degree"
## [48] "Applicants total"
## [49] "Admissions total"
## [50] "Enrolled total"
## [51] "Percent of freshmen submitting SAT scores"
## [52] "Percent of freshmen submitting ACT scores"
## [53] "SAT Critical Reading 25th percentile score"
## [54] "SAT Critical Reading 75th percentile score"
## [55] "SAT Math 25th percentile score"
## [56] "SAT Math 75th percentile score"
## [57] "SAT Writing 25th percentile score"
## [58] "SAT Writing 75th percentile score"
## [59] "ACT Composite 25th percentile score"
## [60] "ACT Composite 75th percentile score"
## [61] "Estimated enrollment, total"
## [62] "Estimated enrollment, full time"
## [63] "Estimated enrollment, part time"
## [64] "Estimated undergraduate enrollment, total"
## [65] "Estimated undergraduate enrollment, full time"
## [66] "Estimated undergraduate enrollment, part time"
## [67] "Estimated freshman undergraduate enrollment, total"
## [68] "Estimated freshman enrollment, full time"
## [69] "Estimated freshman enrollment, part time"
## [70] "Estimated graduate enrollment, total"
## [71] "Estimated graduate enrollment, full time"
## [72] "Estimated graduate enrollment, part time"
## [73] "Associate's degrees awarded"
## [74] "Bachelor's degrees awarded"
## [75] "Master's degrees awarded"
## [76] "Doctor's degrese - research/scholarship awarded"
## [77] "Doctor's degrees - professional practice awarded"
## [78] "Doctor's degrees - other awarded"
## [79] "Certificates of less than 1-year awarded"
## [80] "Certificates of 1 but less than 2-years awarded"
## [81] "Certificates of 2 but less than 4-years awarded"
## [82] "Postbaccalaureate certificates awarded"
## [83] "Post-master's certificates awarded"
## [84] "Number of students receiving an Associate's degree"
## [85] "Number of students receiving a Bachelor's degree"
## [86] "Number of students receiving a Master's degree"
## [87] "Number of students receiving a Doctor's degree"
## [88] "Number of students receiving a certificate of less than 1-year"
## [89] "Number of students receiving a certificate of 1 but less than 4-years"
## [90] "Number of students receiving a Postbaccalaureate or Post-master's certificate"
## [91] "Percent admitted - total"
## [92] "Admissions yield - total"
## [93] "Tuition and fees, 2010-11"
## [94] "Tuition and fees, 2011-12"
## [95] "Tuition and fees, 2012-13"
## [96] "Tuition and fees, 2013-14"
## [97] "Total price for in-state students living on campus 2013-14"
## [98] "Total price for out-of-state students living on campus 2013-14"
## [99] "State abbreviation"
## [100] "FIPS state code"
## [101] "Geographic region"
## [102] "Sector of institution"
## [103] "Level of institution"
## [104] "Control of institution"
## [105] "Historically Black College or University"
## [106] "Tribal college"
## [107] "Degree of urbanization (Urban-centric locale)"
## [108] "Carnegie Classification 2010: Basic"
## [109] "Total enrollment"
## [110] "Full-time enrollment"
## [111] "Part-time enrollment"
## [112] "Undergraduate enrollment"
## [113] "Graduate enrollment"
## [114] "Full-time undergraduate enrollment"
## [115] "Part-time undergraduate enrollment"
## [116] "Percent of total enrollment that are American Indian or Alaska Native"
## [117] "Percent of total enrollment that are Asian"
## [118] "Percent of total enrollment that are Black or African American"
## [119] "Percent of total enrollment that are Hispanic/Latino"
## [120] "Percent of total enrollment that are Native Hawaiian or Other Pacific Islander"
## [121] "Percent of total enrollment that are White"
## [122] "Percent of total enrollment that are two or more races"
## [123] "Percent of total enrollment that are Race/ethnicity unknown"
## [124] "Percent of total enrollment that are Nonresident Alien"
## [125] "Percent of total enrollment that are Asian/Native Hawaiian/Pacific Islander"
## [126] "Percent of total enrollment that are women"
## [127] "Percent of undergraduate enrollment that are American Indian or Alaska Native"
## [128] "Percent of undergraduate enrollment that are Asian"
## [129] "Percent of undergraduate enrollment that are Black or African American"
## [130] "Percent of undergraduate enrollment that are Hispanic/Latino"
## [131] "Percent of undergraduate enrollment that are Native Hawaiian or Other Pacific Islander"
## [132] "Percent of undergraduate enrollment that are White"
## [133] "Percent of undergraduate enrollment that are two or more races"
## [134] "Percent of undergraduate enrollment that are Race/ethnicity unknown"
## [135] "Percent of undergraduate enrollment that are Nonresident Alien"
## [136] "Percent of undergraduate enrollment that are Asian/Native Hawaiian/Pacific Islander"
## [137] "Percent of undergraduate enrollment that are women"
## [138] "Percent of graduate enrollment that are American Indian or Alaska Native"
## [139] "Percent of graduate enrollment that are Asian"
## [140] "Percent of graduate enrollment that are Black or African American"
## [141] "Percent of graduate enrollment that are Hispanic/Latino"
## [142] "Percent of graduate enrollment that are Native Hawaiian or Other Pacific Islander"
## [143] "Percent of graduate enrollment that are White"
## [144] "Percent of graduate enrollment that are two or more races"
## [145] "Percent of graduate enrollment that are Race/ethnicity unknown"
## [146] "Percent of graduate enrollment that are Nonresident Alien"
## [147] "Percent of graduate enrollment that are Asian/Native Hawaiian/Pacific Islander"
## [148] "Percent of graduate enrollment that are women"
## [149] "Number of first-time undergraduates - in-state"
## [150] "Percent of first-time undergraduates - in-state"
## [151] "Number of first-time undergraduates - out-of-state"
## [152] "Percent of first-time undergraduates - out-of-state"
## [153] "Number of first-time undergraduates - foreign countries"
## [154] "Percent of first-time undergraduates - foreign countries"
## [155] "Number of first-time undergraduates - residence unknown"
## [156] "Percent of first-time undergraduates - residence unknown"
## [157] "Graduation rate - Bachelor degree within 4 years, total"
## [158] "Graduation rate - Bachelor degree within 5 years, total"
## [159] "Graduation rate - Bachelor degree within 6 years, total"
## [160] "Percent of freshmen receiving any financial aid"
## [161] "Percent of freshmen receiving federal, state, local or institutional grant aid"
## [162] "Percent of freshmen receiving federal grant aid"
## [163] "Percent of freshmen receiving Pell grants"
## [164] "Percent of freshmen receiving other federal grant aid"
## [165] "Percent of freshmen receiving state/local grant aid"
## [166] "Percent of freshmen receiving institutional grant aid"
## [167] "Percent of freshmen receiving student loan aid"
## [168] "Percent of freshmen receiving federal student loans"
## [169] "Percent of freshmen receiving other loan aid"
## [170] "Endowment assets (year end) per FTE enrollment (GASB)"
## [171] "Endowment assets (year end) per FTE enrollment (FASB)"
speed_plot <- join_data2 %>%
mutate('conference' = as.factor(join_data2$conference)) %>%
ggplot(aes(x = sec_duration, y = bpm, color = join_data2$conference, text = paste0("<b>Song Name: </b>", join_data2$song_name, "<br>", "<b>School: <b>", join_data2$school))) +
geom_point() +
geom_hline(yintercept = median(join_data2$bpm)) +
geom_vline(xintercept = median(join_data2$sec_duration)) +
xlab("Song Length (Seconds)") +
ylab("Song Speed (bpm)") +
scale_colour_brewer(palette = "Set1") +
theme(panel.grid = element_blank())
speed_plotly <- ggplotly(speed_plot, tooltip = "text") %>%
hide_legend()
speed_plotly
join_data2 <- join_data2 %>%
mutate(speed_cat = if_else(join_data2$bpm < 140 & join_data2$sec_duration < 67, 'Short & Slow', if_else(join_data2$bpm < 140 & join_data2$sec_duration > 67, 'Long & Slow', if_else(join_data2$bpm > 140 & join_data2$sec_duration < 67, 'Short & Fast', 'Long & Fast'))))
map <- map_data('state')
join_data2$year <- as.numeric(as.character(join_data2$year))
## Warning: NAs introduced by coercion
filter1900 <- join_data2 %>%
filter(join_data2$year, join_data2$year < 1900)
map %>%
ggplot(aes(x = long, y = lat)) +
geom_path(aes(group = group)) +
geom_point(data = filter1900,
aes(x = filter1900$'Longitude location of institution', y = filter1900$'Latitude location of institution', color = filter1900$student_writer)) +
geom_label(data = filter1900, aes(filter1900$'Longitude location of institution', filter1900$'Latitude location of institution', label = year, color = filter1900$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE) +
scale_colour_brewer(palette = "Set1") +
theme(panel.grid = element_blank()) +
coord_map() +
theme(legend.position = 'none', axis.text.x = element_blank(),
axis.text.y = element_blank(), axis.ticks = element_blank()) +
ylab("") +
xlab("")
## Warning: Ignoring unknown parameters: check_overlap
filter1905 <- join_data2 %>%
filter(join_data2$year, join_data2$year < 1910 & join_data2$year > 1900)
filter1905T <- join_data2 %>%
filter(join_data2$year, join_data2$year < 1910)
filter1905T$student_writer <- factor(filter1905T$student_writer, levels = c("Yes", "No", "Unknown"))
map %>%
ggplot(aes(x = long, y = lat)) +
geom_path(aes(group = group)) +
geom_point(data = filter1905T,
aes(x = filter1905T$'Longitude location of institution', y = filter1905T$'Latitude location of institution', color = filter1905T$student_writer)) +
geom_label(data=filter1905, aes(filter1905$'Longitude location of institution', filter1905$'Latitude location of institution', label = year, color = filter1905$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE) +
scale_colour_brewer(palette = "Set1") +
theme(panel.grid = element_blank()) +
coord_map() +
theme(legend.position = 'none', axis.text.x = element_blank(),
axis.text.y = element_blank(), axis.ticks = element_blank()) +
ylab("") +
xlab("")
## Warning: Ignoring unknown parameters: check_overlap
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_label).
filter <- join_data2 %>%
filter(join_data2$year, join_data2$year < 1915 & join_data2$year > 1910)
filterT <- join_data2 %>%
filter(join_data2$year, join_data2$year < 1915)
filterT$student_writer <- factor(filterT$student_writer, levels = c("Yes", "No", "Unknown"))
map %>%
ggplot(aes(x = long, y = lat)) +
geom_path(aes(group = group)) +
geom_point(data = filterT,
aes(x = filterT$'Longitude location of institution', y = filterT$'Latitude location of institution', color = filterT$student_writer)) +
geom_label(data=filter, aes(filter$'Longitude location of institution', filter$'Latitude location of institution', label = year, color = filter$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE) +
scale_colour_brewer(palette = "Set1") +
theme(panel.grid = element_blank()) +
coord_map() +
theme(legend.position = 'none', axis.text.x = element_blank(),
axis.text.y = element_blank(), axis.ticks = element_blank()) +
ylab("") +
xlab("")
## Warning: Ignoring unknown parameters: check_overlap
## Warning: Removed 1 rows containing missing values (geom_point).
filter <- join_data2 %>%
filter(join_data2$year, join_data2$year < 1920 & join_data2$year > 1915)
filterT <- join_data2 %>%
filter(join_data2$year, join_data2$year < 1920)
filterT$student_writer <- factor(filterT$student_writer, levels = c("Yes", "No", "Unknown"))
map %>%
ggplot(aes(x = long, y = lat)) +
geom_path(aes(group = group)) +
geom_point(data = filterT,
aes(x = filterT$'Longitude location of institution', y = filterT$'Latitude location of institution', color = filterT$student_writer)) +
geom_label(data=filter, aes(filter$'Longitude location of institution', filter$'Latitude location of institution', label = year, color = filter$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE) +
scale_colour_brewer(palette = "Set1") +
theme(panel.grid = element_blank()) +
coord_map() +
theme(legend.position = 'none', axis.text.x = element_blank(),
axis.text.y = element_blank(), axis.ticks = element_blank()) +
ylab("") +
xlab("")
## Warning: Ignoring unknown parameters: check_overlap
## Warning: Removed 1 rows containing missing values (geom_point).
filter <- join_data2 %>%
filter(join_data2$year, join_data2$year < 1930 & join_data2$year > 1920)
filterT <- join_data2 %>%
filter(join_data2$year, join_data2$year < 1930)
filterT$student_writer <- factor(filterT$student_writer, levels = c("Yes", "No", "Unknown"))
map %>%
ggplot(aes(x = long, y = lat)) +
geom_path(aes(group = group)) +
geom_point(data = filterT,
aes(x = filterT$'Longitude location of institution', y = filterT$'Latitude location of institution', color = filterT$student_writer)) +
geom_label(data=filter, aes(filter$'Longitude location of institution', filter$'Latitude location of institution', label = year, color = filter$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE)+
scale_colour_brewer(palette = "Set1") +
theme(panel.grid = element_blank()) +
coord_map() +
theme(legend.position = 'none', axis.text.x = element_blank(),
axis.text.y = element_blank(), axis.ticks = element_blank()) +
ylab("") +
xlab("")
## Warning: Ignoring unknown parameters: check_overlap
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_label).
filter <- join_data2 %>%
filter(join_data2$year, join_data2$year < 1940 & join_data2$year > 1930)
filterT <- join_data2 %>%
filter(join_data2$year, join_data2$year < 1940)
filterT$student_writer <- factor(filterT$student_writer, levels = c("Yes", "No", "Unknown"))
map %>%
ggplot(aes(x = long, y = lat)) +
geom_path(aes(group = group)) +
geom_point(data = filterT,
aes(x = filterT$'Longitude location of institution', y = filterT$'Latitude location of institution', color = filterT$student_writer)) +
geom_label(data=filter, aes(filter$'Longitude location of institution', filter$'Latitude location of institution', label = year, color = filter$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE)+
scale_colour_brewer(palette = "Set1") +
theme(panel.grid = element_blank()) +
coord_map() +
theme(legend.position = 'none', axis.text.x = element_blank(),
axis.text.y = element_blank(), axis.ticks = element_blank()) +
ylab("") +
xlab("")
## Warning: Ignoring unknown parameters: check_overlap
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_label).
filter <- join_data2 %>%
filter(join_data2$year, join_data2$year < 1950 & join_data2$year > 1940)
filterT <- join_data2 %>%
filter(join_data2$year, join_data2$year < 1950)
filterT$student_writer <- factor(filterT$student_writer, levels = c("Yes", "No", "Unknown"))
map %>%
ggplot(aes(x = long, y = lat)) +
geom_path(aes(group = group)) +
geom_point(data = filterT,
aes(x = filterT$'Longitude location of institution', y = filterT$'Latitude location of institution', color = filterT$student_writer)) +
geom_label(data=filter, aes(filter$'Longitude location of institution', filter$'Latitude location of institution', label = year, color = filter$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE)+
scale_colour_brewer(palette = "Set1") +
theme(panel.grid = element_blank()) +
coord_map() +
theme(legend.position = 'none', axis.text.x = element_blank(),
axis.text.y = element_blank(), axis.ticks = element_blank()) +
ylab("") +
xlab("")
## Warning: Ignoring unknown parameters: check_overlap
## Warning: Removed 4 rows containing missing values (geom_point).
filter <- join_data2 %>%
filter(join_data2$year, join_data2$year < 1960 & join_data2$year > 1950)
filterT <- join_data2 %>%
filter(join_data2$year, join_data2$year < 1960)
filterT$student_writer <- factor(filterT$student_writer, levels = c("Yes", "No", "Unknown"))
map %>%
ggplot(aes(x = long, y = lat)) +
geom_path(aes(group = group)) +
geom_point(data = filterT,
aes(x = filterT$'Longitude location of institution', y = filterT$'Latitude location of institution', color = filterT$student_writer)) +
geom_label(data=filter, aes(filter$'Longitude location of institution', filter$'Latitude location of institution', label = year, color = filter$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE)+
scale_colour_brewer(palette = "Set1") +
theme(panel.grid = element_blank()) +
coord_map() +
theme(legend.position = 'none', axis.text.x = element_blank(),
axis.text.y = element_blank(), axis.ticks = element_blank()) +
ylab("") +
xlab("")
## Warning: Ignoring unknown parameters: check_overlap
## Warning: Removed 5 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_label).
filter <- join_data2 %>%
filter(join_data2$year, join_data2$year < 1970 & join_data2$year > 1960)
filterT <- join_data2 %>%
filter(join_data2$year, join_data2$year < 1970)
filterT$student_writer <- factor(filterT$student_writer, levels = c("Yes", "No", "Unknown"))
map %>%
ggplot(aes(x = long, y = lat)) +
geom_path(aes(group = group)) +
geom_point(data = filterT,
aes(x = filterT$'Longitude location of institution', y = filterT$'Latitude location of institution', color = filterT$student_writer)) +
geom_label(data=filter, aes(filter$'Longitude location of institution', filter$'Latitude location of institution', label = year, color = filter$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE)+
scale_colour_brewer(palette = "Set1") +
theme(panel.grid = element_blank()) +
coord_map() +
theme(legend.position = 'none', axis.text.x = element_blank(),
axis.text.y = element_blank(), axis.ticks = element_blank()) +
ylab("") +
xlab("")
## Warning: Ignoring unknown parameters: check_overlap
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_label).
map %>%
ggplot(aes(x = long, y = lat)) +
geom_path(aes(group = group)) +
geom_point(data = filterT,
aes(x = filterT$'Longitude location of institution', y = filterT$'Latitude location of institution', color = filterT$student_writer)) +
scale_colour_brewer(palette = "Set1") +
theme(panel.grid = element_blank()) +
coord_map() +
theme(legend.position = 'none', axis.text.x = element_blank(),
axis.text.y = element_blank(), axis.ticks = element_blank()) +
ylab("") +
xlab("")
## Warning: Removed 6 rows containing missing values (geom_point).
join_data2 <- join_data2 %>%
mutate(age = if_else(join_data2$year <= 1922, 'old', 'new'))
join_data2$student_writer <- factor(join_data2$student_writer, levels = c("Yes", "No", "Unknown"))
join_data2 %>%
group_by(age, student_writer) %>%
ggplot(aes(age, fill = student_writer), rm.na = TRUE) +
geom_bar()+
scale_colour_brewer(palette = "Set1")
join_data2 %>%
group_by(speed_cat) %>%
ggplot(aes(x = speed_cat, y = year, color = student_writer)) +
geom_point()+
geom_boxplot() +
scale_colour_brewer(palette = "Set1") +
xlab("Length & Speed")
## Warning: Removed 5 rows containing non-finite values (stat_boxplot).
## Warning: Removed 5 rows containing missing values (geom_point).
join_data2 %>%
ggplot(aes(x = Niche_Athletic_Rank, y = X2019_FB_Wins, color = as.factor(number_fights))) +
geom_point() +
scale_colour_brewer(palette = "Set1")
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: Removed 3 rows containing missing values (geom_point).
join_data2 %>%
ggplot(aes(x = join_data2$'Tuition and fees, 2013-14', y = Niche_Athletic_Rank, color = speed_cat)) +
geom_point()
## Warning: Removed 6 rows containing missing values (geom_point).
join_data2 %>%
ggplot(aes(x = Niche_Party_Rank, y = join_data2$'Graduation rate - Bachelor degree within 4 years, total', color = nonsense)) +
geom_point()
## Warning: Removed 6 rows containing missing values (geom_point).
join_data2 %>%
ggplot(aes(x = men, y = join_data2$'Percent of total enrollment that are women')) +
geom_boxplot() +
xlab("Does the song refer to either men or boys?") +
ylab("Percentage of Students who are Female")
## Warning: Removed 6 rows containing non-finite values (stat_boxplot).
join_data2 %>%
ggplot(aes(x = spelling, y = join_data2$'SAT Writing 75th percentile score', color = spelling)) +
geom_point() +
scale_colour_brewer(palette = "Set1") +
xlab("Does the song spell out words?") +
ylab("SAT 75th Percentile Score")
## Warning: Removed 28 rows containing missing values (geom_point).
join_data2 %>%
ggplot(aes(x = spelling, y = join_data2$'SAT Writing 25th percentile score', color = spelling)) +
geom_point() +
scale_colour_brewer(palette = "Set1") +
xlab("Does the song spell out words?") +
ylab("SAT 25th Percentile Score")
## Warning: Removed 28 rows containing missing values (geom_point).
join_data2 %>%
group_by(speed_cat) %>%
ggplot(aes(x = Niche_Athletic_Rank, y = Niche_Party_Rank, color = speed_cat)) +
geom_point()+
scale_colour_brewer(palette = "Set1") +
xlab("Athletic Ranking") +
ylab("Party Ranking")
join_data_longer <- join_data2 %>%
pivot_longer(victory:spelling, names_to = 'lyric', values_to='score')
join_data_longer %>%
group_by(lyric) %>%
ggplot(aes(x = score, y = X2019_FB_Wins)) +
geom_boxplot() +
facet_wrap(~lyric)
| henrys part |
|---|
| matthews part |
r join_data = join_data %>% rename(Wins = X2019_FB_Wins, Losses = X2019_FB_Losses) ggplot(join_data, aes(x = Niche_Athletic_Rank, y = Niche_Party_Rank)) + geom_point(aes(size = Wins / 3, shape = student_writer, color = conference)) + xlim(-10, 110) + ylim(-10, 270) + geom_vline(xintercept = 50) + geom_hline(yintercept = 120) + ggtitle("Athletic rank vs Party rank") + xlab("Athletic Rank") + ylab("Party Rank") |
## Warning: Removed 9 rows containing missing values (geom_point). |
| ## Party schools fight songs writer varies while party rank < 100 but mostly written by non-students while party rank > 100. ## The biggest Party and athletic schools are from the Big Ten and SEC. The smallest ones are from ACC and the Pac-12. |
anns part (if doing more than cleaning)